package com.oswift.utils.common;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlUtils
{
    /**
     * html图片
     */
    public static final String IMG_REG = "<img.*src=(.*?)[^>]*?>";

    /**
     * 逗号
     */
    public static final String COMMA = ",";

    /**
     *
     * 获取html内容中的所有图片路径
     *
     * @author zhuou
     * @param html
     *            内容html
     * @return List
     */
    public static List<String> getImgSrcFromHtml(String html)
    {
        List<String> imgList = new ArrayList<String>();
        Pattern pattern = Pattern.compile(IMG_REG, Pattern.CASE_INSENSITIVE);
        if (StringUtil.isEmpty(html))
        {
            return imgList;
        }
        Matcher matcher = pattern.matcher(html);
        while (matcher.find())
        {
            String img = "";
            img = img + COMMA + matcher.group();
            Matcher m = Pattern.compile("src=\"?(.*?)(\"|>|\\s+)").matcher(img);
            while (m.find())
            {
                String imgurl = m.group(1);
                if (imgurl.lastIndexOf(".") != -1)
                {
                    if (imgurl.indexOf("?") != -1)
                    {
                        int end = imgurl.indexOf("?");
                        if (end > 0)
                        {
                            imgurl = imgurl.substring(0, end);
                        }
                    }
                    imgList.add(imgurl);
                }
            }
        }

        return imgList;
    }
}
